RRPlot and the Colon data set

Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
#source("~/GitHub/FRESA.CAD/R/PoissonEventRiskCalibration.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

The data set

data(cancer)
colon <- subset(colon,etype==1)
colon$etype <- NULL
rownames(colon) <- colon$id
colon$id <- NULL
colon <- colon[complete.cases(colon),]
time <- colon$time
status <- colon$status
data <- colon
data$time <- NULL
data$study <- NULL
table(data$status)

0 1 442 446

dataColon <- as.data.frame(model.matrix(status~.*age,data))
dataColon$`(Intercept)` <- NULL
dataColon$time <- time/365
dataColon$status <- status
colnames(dataColon) <-str_replace_all(colnames(dataColon),":","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\.","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\+","_")
data <- NULL

trainsamples <- sample(nrow(dataColon),0.7*nrow(dataColon))
dataColonTrain <- dataColon[trainsamples,]
dataColonTest <- dataColon[-trainsamples,]


pander::pander(table(dataColonTrain$status))
0 1
306 315
pander::pander(table(dataColonTest$status))
0 1
136 131

Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=dataColonTrain,loops=20,NumberofRepeats = 5)

[+++++++-+++-+++-+++-].

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy
nodes 0.046673 1.032 1.048 1.063 0.602
age_node4 0.002418 1.001 1.002 1.003 0.597
node4 0.167354 1.106 1.182 1.264 0.597
age -0.004939 0.993 0.995 0.997 0.498
age_nodes 0.000222 1.000 1.000 1.000 0.607
age_extent 0.001202 1.001 1.001 1.002 0.539
extent 0.271263 1.139 1.312 1.511 0.552
rxLev_5FU_age -0.003349 0.995 0.997 0.998 0.575
rxLev_5FU -0.138633 0.807 0.871 0.940 0.575
adhere 0.144162 1.034 1.155 1.290 0.533
age_adhere 0.000621 1.000 1.001 1.001 0.533
perfor 0.037095 1.006 1.038 1.071 0.507
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
nodes 0.600 0.622 0.604 0.599 0.622
age_node4 0.513 0.599 0.601 0.510 0.602
node4 0.584 0.608 0.601 0.583 0.610
age 0.604 0.618 0.498 0.606 0.619
age_nodes 0.572 0.614 0.609 0.571 0.615
age_extent 0.625 0.618 0.539 0.626 0.619
extent 0.630 0.622 0.547 0.631 0.622
rxLev_5FU_age 0.615 0.622 0.572 0.616 0.622
rxLev_5FU 0.612 0.611 0.572 0.614 0.613
adhere 0.617 0.618 0.538 0.618 0.618
age_adhere 0.612 0.618 0.538 0.613 0.619
perfor 0.597 0.604 0.514 0.601 0.607
  IDI NRI z.IDI z.NRI Delta.AUC Frequency
nodes 0.03643 0.3877 6.20 5.15 0.023347 1.0
age_node4 0.03512 0.3988 5.38 5.78 0.092336 1.0
node4 0.03310 0.3874 5.19 5.63 0.027826 1.0
age 0.02811 0.2711 4.80 3.42 0.012838 0.2
age_nodes 0.02438 0.3120 4.75 4.11 0.044027 1.0
age_extent 0.02152 0.1819 4.11 3.31 -0.007049 0.2
extent 0.01793 0.1634 3.76 2.92 -0.008707 1.0
rxLev_5FU_age 0.01602 0.2893 3.64 3.90 0.006537 1.0
rxLev_5FU 0.01550 0.2893 3.54 3.90 -0.001419 1.0
adhere 0.00597 0.1358 2.55 2.39 0.000426 1.0
age_adhere 0.00550 0.1776 2.53 3.05 0.006116 0.2
perfor 0.00522 0.0564 2.31 2.18 0.006162 0.2

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

index <- predict(ml,dataColonTrain)
timeinterval <- round(2*mean(subset(dataColonTrain,status==1)$time),0)
timeinterval <- 2

h0 <- sum(dataColonTrain$status & dataColonTrain$time <= timeinterval)
h0 <- h0/sum((dataColonTrain$time > timeinterval) | (dataColonTrain$status==1))

rdata <- cbind(dataColonTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataColonTrain$time,
                     title="Raw Train: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

By Risk Categories

obsexp <- rrAnalysisTrain$OERatio$atThrEstimates
pander::pander(obsexp)
  Observed L.CI H.CI Expected pvalue
Total 315 281.2 351.8 458.0 1.87e-12
low 174 149.1 201.9 281.3 7.74e-12
90% 45 32.8 60.2 60.2 5.27e-02
80% 96 77.8 117.2 103.3 5.22e-01
maxx <- 1.1*max(c(obsexp$Observed,obsexp$Expected))
minx <- 0.9*min(c(obsexp$Observed,obsexp$Expected))

plot(obsexp$Expected,obsexp$Observed,
     xlim=c(minx,maxx),
     ylim=c(minx,maxx),
     main="Expected vs Observed",
     ylab="Observed",
     xlab="Expected",
     col=rainbow(nrow(obsexp)),
     log="xy")

errbar(obsexp$Expected,obsexp$Observed,obsexp$L.CI,obsexp$H.CI,add=TRUE,pch=0,errbar.col=rainbow(nrow(obsexp)),cex=0.75)
lines(x=c(1,maxx),y=c(1,maxx),lty=2)
text(obsexp$Expected,obsexp$Observed,rownames(obsexp),pos=2,cex=0.75)

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.440 0.341 0.320 0.218 0.12136 0.4999
RR 1.688 1.669 1.699 3.047 1.00000 1.7036
RR_LCI 1.467 1.442 1.450 1.852 0.00000 1.4729
RR_UCI 1.943 1.930 1.992 5.012 0.00000 1.9705
SEN 0.305 0.448 0.584 0.959 1.00000 0.1873
SPE 0.895 0.797 0.683 0.193 0.00654 0.9510
BACC 0.600 0.623 0.634 0.576 0.50327 0.5691
NetBenefit 0.114 0.175 0.223 0.375 0.43964 0.0709
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.688 0.614 0.768 1.87e-12
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.18 1.18 1.16 1.2
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.48 1.48 1.48 1.49
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.659 0.66 0.631 0.69
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.688 0.647 0.729
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.305 0.254 0.359
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.895 0.856 0.927
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.44 0.341
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 78.265578 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 418 174 236.3 16.41 66.34
class=1 75 45 33.5 3.97 4.45
class=2 128 96 45.3 56.90 67.24

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataColonTrain,"status","time")

( 3.114539 , 3182.293 , 1.031662 , 315 , 524.6601 )

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 TimeInterval=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain TimeInterval
0.701 1.57 3.11

The RRplot() of the calibrated model

rCaldata <- cbind(dataColonTrain$status,calprob$prob)


rrAnalysisCalTrain <- RRPlot(rCaldata,atRate=c(0.90,0.80),
                     timetoEvent=dataColonTrain$time,
                     title="Calibrated Train: Colon",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=calprob$timeInterval)

By Risk Categories

obsexp <- rrAnalysisCalTrain$OERatio$atThrEstimates
pander::pander(obsexp)
  Observed L.CI H.CI Expected pvalue
Total 315 281.2 351.8 528.7 1.13e-23
low 174 149.1 201.9 324.8 6.53e-20
90% 45 32.8 60.2 69.5 2.18e-03
80% 96 77.8 117.2 119.2 3.13e-02
maxx <- 1.1*max(c(obsexp$Observed,obsexp$Expected))
minx <- 0.9*min(c(obsexp$Observed,obsexp$Expected))

plot(obsexp$Expected,obsexp$Observed,
     xlim=c(minx,maxx),
     ylim=c(minx,maxx),
     main="Cal. Expected vs Observed",
     ylab="Observed",
     xlab="Expected",
     col=rainbow(nrow(obsexp)),
     log="xy")

errbar(obsexp$Expected,obsexp$Observed,obsexp$L.CI,obsexp$H.CI,add=TRUE,pch=0,errbar.col=rainbow(nrow(obsexp)),cex=0.75)
lines(x=c(1,maxx),y=c(1,maxx),lty=2)
text(obsexp$Expected,obsexp$Observed,rownames(obsexp),pos=2,cex=0.75)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.440 0.341 0.320 0.218 0.12136 0.4999
RR 1.688 1.669 1.699 3.047 1.00000 1.7036
RR_LCI 1.467 1.442 1.450 1.852 0.00000 1.4729
RR_UCI 1.943 1.930 1.992 5.012 0.00000 1.9705
SEN 0.305 0.448 0.584 0.959 1.00000 0.1873
SPE 0.895 0.797 0.683 0.193 0.00654 0.9510
BACC 0.600 0.623 0.634 0.576 0.50327 0.5691
NetBenefit 0.114 0.175 0.223 0.375 0.43964 0.0709
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.688 0.614 0.768 1.87e-12
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.18 1.18 1.16 1.2
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.48 1.48 1.48 1.49
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.659 0.66 0.631 0.69
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.688 0.647 0.729
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.305 0.254 0.359
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.895 0.856 0.927
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.44 0.341
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 78.265578 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 418 174 236.3 16.41 66.34
class=1 75 45 33.5 3.97 4.45
class=2 128 96 45.3 56.90 67.24

Evaluating on the test set

The calibrated h0 and timeinterval were estimated on the training set

index <- predict(ml,dataColonTest)
rdata <- cbind(dataColonTest$status,ppoisGzero(index,h0))

rrAnalysisTest <- RRPlot(rdata,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=dataColonTest$time,
                     title="Test: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.44 @:0.341 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.4400 0.341 0.298 0.211 0.15409 0.5014
RR 1.6671 1.654 2.146 9.398 1.00000 1.6421
RR_LCI 1.3284 1.309 1.573 1.394 0.00000 1.2678
RR_UCI 2.0922 2.089 2.928 63.366 0.00000 2.1271
SEN 0.2824 0.473 0.748 0.992 1.00000 0.1450
SPE 0.8971 0.765 0.581 0.125 0.00735 0.9559
BACC 0.5898 0.619 0.664 0.559 0.50368 0.5505
NetBenefit 0.0974 0.170 0.277 0.368 0.39854 0.0486
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.663 0.554 0.787 6.04e-07
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.02 1.02 0.987 1.04
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.46 1.46 1.45 1.47
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.662 0.663 0.614 0.71
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.699 0.636 0.761
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.282 0.207 0.368
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.897 0.833 0.943
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.44 0.341
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 28.304827 on 2 degrees of freedom, p = 0.000001
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 174 70 94.7 6.45 23.43
class=1 42 24 18.2 1.85 2.15
class=2 51 37 18.1 19.76 23.11

Cross-Validation

Here we will cross validate the training set and evaluate also on the testing set. The h0 and the timeinterval are the ones estimated on the calibration process

rcv <- randomCV(theData=dataColonTrain,
                theOutcome = Surv(time,status)~1,
                fittingFunction=BSWiMS.model, 
                trainFraction = 0.75,
                repetitions=50,
                classSamplingType = "Pro",
                testingSet=dataColonTest
         )

.[+++++-].[+++++].[+++++].[++].[++-].[+++++-].[+++-].[++++-].[++++++].[++++-]10 Tested: 857 Avg. Selected: 9 Min Tests: 1 Max Tests: 10 Mean Tests: 4.935823 . MAD: 0.4705305

.[+++].[+++].[++++-].[++++++].[+++-].[++++-].[+++].[+++++].[++–].[++++]20 Tested: 887 Avg. Selected: 8.7 Min Tests: 1 Max Tests: 20 Mean Tests: 9.537768 . MAD: 0.4697102

.[+++-].[+++-].[++++++].[+++-].[++++].[++++++].[+++-].[++++].[++++].[++++++]30 Tested: 888 Avg. Selected: 8.833333 Min Tests: 2 Max Tests: 30 Mean Tests: 14.29054 . MAD: 0.4700908

.[++++].[+++-].[++++-].[+++-].[++++].[+++-].[+++-].[+++-].[+++++++].[++++++++–]40 Tested: 888 Avg. Selected: 8.85 Min Tests: 3 Max Tests: 40 Mean Tests: 19.05405 . MAD: 0.4699049

.[+++-].[+++].[+++++].[+++].[+++++].[++++].[+++-].[+++-].[+++++].[+++-]50 Tested: 888 Avg. Selected: 8.9 Min Tests: 5 Max Tests: 50 Mean Tests: 23.81757 . MAD: 0.4698109

stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]

bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)

rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names

rrAnalysisCVTest <- RRPlot(rdatacv,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=times,
                     title="CV Test: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

CV Test Performance

pander::pander(t(rrAnalysisCVTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.44 @:0.341 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.441 0.341 0.319 0.216 0.13011 0.5002
RR 1.656 1.564 1.682 2.814 1.00000 1.5915
RR_LCI 1.467 1.382 1.474 1.645 0.00000 1.3868
RR_UCI 1.869 1.771 1.918 4.815 0.00000 1.8264
SEN 0.276 0.424 0.561 0.975 1.00000 0.1570
SPE 0.903 0.785 0.699 0.109 0.00452 0.9480
BACC 0.589 0.604 0.630 0.542 0.50226 0.5525
NetBenefit 0.100 0.158 0.211 0.368 0.42815 0.0529
pander::pander(t(rrAnalysisCVTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.665 0.605 0.73 3.32e-20
pander::pander(t(rrAnalysisCVTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.12 1.12 1.1 1.13
pander::pander(t(rrAnalysisCVTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.46 1.46 1.46 1.47
pander::pander(rrAnalysisCVTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.65 0.65 0.623 0.673
pander::pander(t(rrAnalysisCVTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.677 0.642 0.712
pander::pander((rrAnalysisCVTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.276 0.235 0.32
pander::pander((rrAnalysisCVTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.903 0.871 0.929
pander::pander(t(rrAnalysisCVTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.44 0.341
pander::pander(rrAnalysisCVTest$surdif,caption="Logrank test")
Logrank test Chisq = 93.033240 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 606 258 335.7 18.00 73.38
class=1 116 65 51.6 3.46 3.92
class=2 166 123 58.6 70.67 82.08